Library loading¶

In [1]:
import scanpy as sc, anndata as ad, numpy as np, pandas as pd
from scipy import sparse
from anndata import AnnData
import warnings
import socket
import holoviews as hv
import plotly.express as px

from matplotlib import pylab
import sys
import yaml
import os
from pandas.api.types import CategoricalDtype
import plotly
import scvelo as scv


import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')
In [2]:
plotly.offline.init_notebook_mode()
In [3]:
import ipynbname
nb_fname = ipynbname.name()
In [4]:
%matplotlib inline
In [5]:
sc.settings.verbosity = 3         # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=50, facecolor='white')
pylab.rcParams['figure.figsize'] = (9, 9)
outBaseName = "04.1A_Neurons_DA"
figDir = "./figures"
scanpy==1.8.1 anndata==0.7.6 umap==0.4.6 numpy==1.20.2 scipy==1.6.3 pandas==1.2.4 scikit-learn==0.24.2 statsmodels==0.13.1 python-igraph==0.9.8 louvain==0.7.1 pynndescent==0.5.5

Configure paths¶

In [6]:
hostRoot = "-".join(socket.gethostname().split('-')[0:2])

with open(os.path.expanduser('~')+"/paths_config.yaml", 'r') as f:
    paths = yaml.load(f, Loader=yaml.FullLoader)

#indir=paths["paths"]["indir"][hostRoot]
outdir="./outdir"
FinaLeaf="/Neurons"
#projectBaseDir=paths["paths"]["projectBaseDir"][hostRoot]
with open("./colorMap.yaml", 'r') as f:
    colorMap = yaml.load(f, Loader=yaml.FullLoader)["uns_colors"]
colorMap
Out[6]:
{'medial': {'color': '#CD5C5C'},
 'distal': {'color': '#FFCBCB'},
 'proximal': {'color': '#8D021F'},
 'piece1': {'color': '#281E5D'},
 'piece2': {'color': '#3779FF'},
 'piece3': {'color': '#BFD4FF'},
 'control': {'color': '#0056D1'},
 'polaroid': {'color': '#DE001E'},
 'enriched': {'color': '#DE001E'},
 'not_enriched': {'color': '#0056D1'},
 'pfc': {'color': '#DE001E'},
 'somatosensory': {'color': '#E5E4E2'},
 'temporal': {'color': '#0056D1'},
 'motor': {'color': '#37F7C8'},
 'v1': {'color': '#28F30C'},
 'parietal': {'color': '#D41FFC'}}
In [7]:
adataAll = sc.read_h5ad(outdir+"/3_polaroid_quickAnno.h5ad")
In [8]:
adataAll
Out[8]:
AnnData object with n_obs × n_vars = 18822 × 28371
    obs: 'dataset', 'organoid', 'region', 'type', 'type_region', 'regionContrast', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'n_genes', 'is.Stressed', 'leiden0.3', 'leidenAnno'
    var: 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'mean', 'std'
    uns: 'dataset_colors', 'dendrogram_leidenAnno', 'diffmap_evals', 'draw_graph', 'is.Stressed_colors', 'leiden', 'leiden0.3_colors', 'leidenAnno_colors', 'neighbors', 'organoid_colors', 'pca', 'rank_genes_groups', 'regionContrast_colors', 'region_colors', 'type_colors', 'umap'
    obsm: 'X_diffmap', 'X_draw_graph_fa', 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'
In [9]:
scv.tl.score_genes_cell_cycle(adataAll)
calculating cell cycle phase
computing score 'S_score'
    finished: added
    'S_score', score of gene set (adata.obs).
    543 total control genes are used. (0:00:00)
computing score 'G2M_score'
    finished: added
    'G2M_score', score of gene set (adata.obs).
    415 total control genes are used. (0:00:00)
-->     'S_score' and 'G2M_score', scores of cell cycle phases (adata.obs)
In [10]:
sc.pl.umap(adataAll, color=["leiden0.3","phase"] ,size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False)
... storing 'phase' as categorical

UMAP¶

In [11]:
sc.pl.umap(adataAll, color="leiden0.3", groups=["2","8"] ,size = 30, add_outline = True,outline_width=(0.2, 0.05), frameon=False)

go back to rawdata and clean¶

In [12]:
adata = adataAll.raw.to_adata().copy()
In [13]:
del adata.var["highly_variable"]
adata.obs = adataAll[adata.obs_names].obs.copy()
del adata.uns
del adata.obsm
adata = adata[adata.obs["leiden0.3"].isin(["2","8"])]

for md in [md for md in adata.obs.columns if "leiden" in md and md != "leiden0.3"]:
    
    del adata.obs[md]

adata.uns["regionContrast_colors"] = [colorMap[i]["color"] for i in adata.obs["regionContrast"].cat.categories]
adata.uns["region_colors"] = [colorMap[i]["color"] for i in adata.obs["region"].cat.categories]
adata.uns["type_colors"] = [colorMap[i]["color"] for i in adata.obs["type"].cat.categories]
Trying to set attribute `.uns` of view, copying.
In [14]:
adata
Out[14]:
AnnData object with n_obs × n_vars = 3650 × 28371
    obs: 'dataset', 'organoid', 'region', 'type', 'type_region', 'regionContrast', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'n_genes', 'is.Stressed', 'leiden0.3', 'S_score', 'G2M_score', 'phase'
    var: 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells'
    uns: 'regionContrast_colors', 'region_colors', 'type_colors'

Sankey of leiden per type¶

In [15]:
obsTupleToMap = ("region","leiden0.3")
SankeyDF=adata.obs[list(obsTupleToMap)]
SankeyDF.columns = ["region","leiden0.3"]
SankeyDF = SankeyDF.groupby(['region','leiden0.3']).size().reset_index().rename(columns={0:'count'})
SankeyDF=SankeyDF[SankeyDF["count"] != 0]
hv.extension('bokeh')



sankey1 = hv.Sankey(SankeyDF, kdims=["region", "leiden0.3"], vdims="count")



sankey1.opts(cmap='Colorblind',label_position='left',
edge_color='region', edge_line_width=0,
node_alpha=1.0, node_width=40, node_sort=True,
width=900, height=700, bgcolor="snow")
Out[15]:

Re HVGs¶

Vertical intersection¶

In [16]:
# Given the epxloratory phase as many genes were retained
VERTICAL_HVGs = {}
for group in adata.obs.type_region.unique():
    adata_group = adata[adata.obs.type_region == group].copy()
    HVGsDF = sc.pp.highly_variable_genes(adata_group, min_mean=0.0125, max_mean=3, min_disp=0.5, inplace=False, batch_key="organoid")
    VERTICAL_HVGs[group]  = set(HVGsDF[HVGsDF["highly_variable_nbatches"] == 3].index)
    
VERTICAL_HVGs = set.union(*list(VERTICAL_HVGs.values()))
extracting highly variable genes
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:00)
In [17]:
len(VERTICAL_HVGs)
Out[17]:
2693

Horizontal intersection¶

In [18]:
import itertools

# Setting up contrasts
proximal = ["polaroid1_proximal","polaroid2_proximal","polaroid3_proximal"]
medial = ["polaroid1_medial","polaroid2_medial","polaroid3_medial"]
distal = ["polaroid1_distal","polaroid2_distal","polaroid3_distal"]

p1 = ["control1_piece1","control2_piece1","control3_piece1"]
p2 = ["control1_piece2","control2_piece2","control3_piece2"]
p3 = ["control1_piece3","control2_piece3","control3_piece3"]

proximal_vs_medial  = list(itertools.product(proximal, medial))
medial_vs_distal  = list(itertools.product(medial, distal))

p1_vs_p2 = list(itertools.product(p1, p2))
p2_vs_p3 = list(itertools.product(p2, p3))
In [19]:
HORIZONTAL_HVGs = {}


# Proximal vs distal regions
# Proximal vs distal regions
# Proximal vs distal regions
In [20]:
proximal_vs_medial_HVGs = {}

for contrast in proximal_vs_medial:
    
    adataContrast = adata[adata.obs["dataset"].isin(list(contrast))].copy()
    print(adataContrast.obs.dataset.value_counts())

    sc.pp.highly_variable_genes(adataContrast, min_mean=0.0125, max_mean=3, min_disp=0.5)
    HVGs = set(adataContrast.var[adataContrast.var.highly_variable].index.tolist())
    proximal_vs_medial_HVGs["_".join(contrast)] = HVGs

proximal_vs_medial_HVGs = pd.Series(list(itertools.chain.from_iterable([list(i) for i in list(proximal_vs_medial_HVGs.values())])))
proximal_vs_medial_HVGs = set(proximal_vs_medial_HVGs.value_counts()[proximal_vs_medial_HVGs.value_counts() == 9].index.tolist())
HORIZONTAL_HVGs["proximal_vs_medial_HVGs"] = proximal_vs_medial_HVGs

    
# medial vs distal regions
# medial vs distal regions
# medial vs distal regions

medial_vs_distal_HVGs = {}

for contrast in medial_vs_distal:
    
    adataContrast = adata[adata.obs["dataset"].isin(list(contrast))].copy()
    print(adataContrast.obs.dataset.value_counts())

    
    sc.pp.highly_variable_genes(adataContrast, min_mean=0.0125, max_mean=3, min_disp=0.5)
    HVGs = set(adataContrast.var[adataContrast.var.highly_variable].index.tolist())
    medial_vs_distal_HVGs["_".join(contrast)] = HVGs

medial_vs_distal_HVGs = pd.Series(list(itertools.chain.from_iterable([list(i) for i in list(medial_vs_distal_HVGs.values())])))
medial_vs_distal_HVGs = set(medial_vs_distal_HVGs.value_counts()[medial_vs_distal_HVGs.value_counts() == 9].index.tolist())
HORIZONTAL_HVGs["medial_vs_distal_HVGs"] = medial_vs_distal_HVGs


# P1 vs P2
# P1 vs P2
# P1 vs P2

p1_vs_p2_HVGs = {}

for contrast in p1_vs_p2:
    
    adataContrast = adata[adata.obs["dataset"].isin(list(contrast))].copy()
    print(adataContrast.obs.dataset.value_counts())
    
    sc.pp.highly_variable_genes(adataContrast, min_mean=0.0125, max_mean=3, min_disp=0.5)
    HVGs = set(adataContrast.var[adataContrast.var.highly_variable].index.tolist())
    p1_vs_p2_HVGs["_".join(contrast)] = HVGs

p1_vs_p2_HVGs = pd.Series(list(itertools.chain.from_iterable([list(i) for i in list(p1_vs_p2_HVGs.values())])))
p1_vs_p2_HVGs = set(p1_vs_p2_HVGs.value_counts()[p1_vs_p2_HVGs.value_counts() == 9].index.tolist())
HORIZONTAL_HVGs["p1_vs_p2_HVGs"] = p1_vs_p2_HVGs



# P2 vs P3
# P2 vs P3
# P2 vs P3

p2_vs_p3_HVGs = {}

for contrast in p2_vs_p3:
    
    adataContrast = adata[adata.obs["dataset"].isin(list(contrast))].copy()
    print(adataContrast.obs.dataset.value_counts())

    sc.pp.highly_variable_genes(adataContrast, min_mean=0.0125, max_mean=3, min_disp=0.5)
    HVGs = set(adataContrast.var[adataContrast.var.highly_variable].index.tolist())
    p2_vs_p3_HVGs["_".join(contrast)] = HVGs

p2_vs_p3_HVGs = pd.Series(list(itertools.chain.from_iterable([list(i) for i in list(p2_vs_p3_HVGs.values())])))
p2_vs_p3_HVGs = set(p2_vs_p3_HVGs.value_counts()[p2_vs_p3_HVGs.value_counts() == 9].index.tolist())
HORIZONTAL_HVGs["p2_vs_p3_HVGs"] = p2_vs_p3_HVGs
polaroid1_proximal    479
polaroid1_medial      148
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid1_proximal    479
polaroid2_medial       83
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid1_proximal    479
polaroid3_medial       43
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid2_proximal    350
polaroid1_medial      148
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid2_proximal    350
polaroid2_medial       83
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid2_proximal    350
polaroid3_medial       43
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid3_proximal    284
polaroid1_medial      148
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid3_proximal    284
polaroid2_medial       83
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid3_proximal    284
polaroid3_medial       43
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid1_distal    342
polaroid1_medial    148
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid1_medial    148
polaroid2_distal     83
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid1_medial    148
polaroid3_distal     63
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid1_distal    342
polaroid2_medial     83
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid2_distal    83
polaroid2_medial    83
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid2_medial    83
polaroid3_distal    63
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid1_distal    342
polaroid3_medial     43
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid2_distal    83
polaroid3_medial    43
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
polaroid3_distal    63
polaroid3_medial    43
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control1_piece2    110
control1_piece1     84
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control2_piece2    135
control1_piece1     84
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control3_piece2    300
control1_piece1     84
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control2_piece1    221
control1_piece2    110
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control2_piece1    221
control2_piece2    135
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control3_piece2    300
control2_piece1    221
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control3_piece1    281
control1_piece2    110
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control3_piece1    281
control2_piece2    135
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control3_piece2    300
control3_piece1    281
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control1_piece3    134
control1_piece2    110
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control2_piece3    139
control1_piece2    110
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control3_piece3    371
control1_piece2    110
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control2_piece2    135
control1_piece3    134
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control2_piece3    139
control2_piece2    135
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control3_piece3    371
control2_piece2    135
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control3_piece2    300
control1_piece3    134
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control3_piece2    300
control2_piece3    139
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
control3_piece3    371
control3_piece2    300
Name: dataset, dtype: int64
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)

Combine all HVGs¶

In [21]:
HORIZONTAL_HVGs = set.union(*list(HORIZONTAL_HVGs.values()))
In [22]:
JointHVGs = list(HORIZONTAL_HVGs.union(VERTICAL_HVGs))
In [23]:
adata.var["highly_variable"] = adata.var_names.isin(JointHVGs)
In [24]:
adata.var["highly_variable"].sum()
Out[24]:
2704

Dataset Preprocessing¶

Neighbors¶

In [25]:
sc.pp.pca(adata, use_highly_variable=True)
computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:00)
In [26]:
sc.pp.neighbors(adata, n_neighbors=50, n_pcs=10)
computing neighbors
    using 'X_pca' with n_pcs = 10
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:02)
In [27]:
sc.tl.leiden(adata, resolution=.4, key_added="subLeiden")
running Leiden clustering
    finished: found 5 clusters and added
    'subLeiden', the cluster labels (adata.obs, categorical) (0:00:00)
In [28]:
sc.tl.umap(adata)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:00:06)
In [29]:
sc.tl.diffmap(adata)
computing Diffusion Maps using n_comps=15(=n_dcs)
computing transitions
    finished (0:00:00)
    eigenvalues of transition matrix
    [1.         0.9754858  0.9639816  0.9497826  0.94473535 0.94054
     0.92622113 0.9179847  0.9113687  0.9067488  0.90024513 0.8944447
     0.88261294 0.87642825 0.86645806]
    finished: added
    'X_diffmap', diffmap coordinates (adata.obsm)
    'diffmap_evals', eigenvalues of transition matrix (adata.uns) (0:00:00)
In [30]:
sc.tl.draw_graph(adata, n_jobs=4)
drawing single-cell graph using layout 'fa'
    finished: added
    'X_draw_graph_fa', graph_drawing coordinates (adata.obsm) (0:00:18)
In [31]:
sc.pl.umap(adata, color=["organoid","type","dataset","subLeiden","MKI67"], size = 100, add_outline = True,outline_width=(0.2, 0.05), ncols=3 ,
           save=nb_fname+".UMAP.pdf")
WARNING: saving figure to file figures/umapNeurons01_Selection.UMAP.pdf
In [32]:
sc.pl.diffmap(adata, color=["organoid","type","dataset","subLeiden","MKI67","VIM","DCX","GAP43","PAX6","FOXG1","FGF8"], size = 50, add_outline = True,outline_width=(0.2, 0.05), ncols=3)
In [33]:
sc.pl.draw_graph(adata, color=["organoid","type","dataset","subLeiden","MKI67","VIM","DCX","GAP43","PAX6","FOXG1","FGF8"], size = 50, add_outline = True,outline_width=(0.2, 0.05), ncols=3)

Plot per type¶

In [34]:
sc.tl.embedding_density(adata, basis='umap', groupby='type')
sc.pl.embedding_density(adata, basis='umap', key='umap_density_type')
computing density on 'umap'
--> added
    'umap_density_type', densities (adata.obs)
    'umap_density_type_params', parameter (adata.uns)

Plot per region¶

In [35]:
sc.tl.embedding_density(adata, basis='umap', groupby='region')
sc.pl.embedding_density(adata, basis='umap', key='umap_density_region')
computing density on 'umap'
--> added
    'umap_density_region', densities (adata.obs)
    'umap_density_region_params', parameter (adata.uns)

Plot per organoid¶

In [36]:
sc.tl.embedding_density(adata, basis='umap', groupby='organoid')
sc.pl.embedding_density(adata, basis='umap', key='umap_density_organoid')
computing density on 'umap'
--> added
    'umap_density_organoid', densities (adata.obs)
    'umap_density_organoid_params', parameter (adata.uns)
In [37]:
adata.write_h5ad(outdir+FinaLeaf+"/4A_Neurons_DA.h5ad")